%load_ext pretty_jupyter
# import packages
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
import json

sparqlep = "http://graph.oceaninfohub.org/blazegraph/namespace/oih/sparql"
from minio import Minio

def publicurls(client, bucket, prefix):
    urls = []
    objects = client.list_objects(bucket, prefix=prefix, recursive=True)
    for obj in objects:
        result = client.stat_object(bucket, obj.object_name)

        if result.size > 0:  #  how to tell if an objet   obj.is_public  ?????
            url = client.presigned_get_object(bucket, obj.object_name)
            # print(f"Public URL for object: {url}")
            urls.append(url)

    return urls

client = Minio("ossapi.oceaninfohub.org:80",  secure=False) # Create client with anonymous access.
def get_sparql_dataframe(service, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    sparql = SPARQLWrapper(service)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

About

This is the introduction to the Ocean InfoHub Release Graph.

Besides this HTML file we would want to package

  • PDF version of this
  • the graphs
  • the original Jupyter Notebook that builds the HTML and PDF
  • any JSON-LD frames or SHACL files used in generating this document

Resource Links

This is our first section. We use so called Jinja Markdown here. It allows us to combine Markdown with Python variables and makes for a more dynamic report.

We can for example print pandas version such as this: 1.5.3.

Providers

name (graph alias) catalog logo
IOC Africa Data Portal (africaioc) catlog
AquaDocs (aquadocs) catlog
Better Biomolecular Ocean Practices (BeBOP) as part of Ocean Biomolecular Observing Network (OBON) (bebop) catlog
Benguela Current Convention (BCC) GeoData Portal (benguelacc) catlog
Caribbean Marine Atlas catalogue (caribbeanmarineatlas) catlog
CIOOS (cioos) catlog
European Directory of Marine Environmental Research Projects (EDMERP) SeaDataNet (edmerp) catlog
European Directory of Marine Organisations (EDMO) SeaDataNet (edmo) catlog
EurOcean Organizations (euroceanorgs) catlog
EurOcean Projects (euroceanprojects) catlog
EurOcean Vessels (euroceanvessels) catlog
European Marine Observation and Data Network catalogue (emodnet) catlog
Indonesia National Oceanic Data Center (inanodc) catlog
CHM LAC - Documents (invemardocuments) catlog
CHM LAC - Experts (invemarexperts) catlog
CHM LAC - Institutions (invemarinstitutions) catlog
CHM LAC - Training (invemartraining) catlog
CHM LAC - Vessels (invemarvessels) catlog
Marine Training EU (marinetraining) catlog
MASPAWIO - Marine Spatial Atlas for the Western Indian Ocean (maspawio) catlog
Ocean Biodiversity Information System (obis) catlog
provider by graph alias size date link
africaioc 707451 2023-03-23 17:18:18+00:00 summonedafricaioc_v1_release.nq
aquadocs 211713813 2023-03-23 17:18:18+00:00 summonedaquadocs_v1_release.nq
cioos 42653232 2023-03-23 17:18:23+00:00 summonedcioos_v1_release.nq
edmerp 17165450 2023-03-23 17:18:24+00:00 summonededmerp_v1_release.nq
edmo 21312369 2023-03-23 17:18:25+00:00 summonededmo_v1_release.nq
emodnet 312445 2023-03-23 17:18:26+00:00 summonedemodnet_v1_release.nq
inanodc 120868 2023-03-23 17:18:26+00:00 summonedinanodc_v1_release.nq
invemardocuments 30805743 2023-03-23 17:18:27+00:00 summonedinvemardocuments_v1_release.nq
invemarexperts 3337999 2023-03-23 17:18:28+00:00 summonedinvemarexperts_v1_release.nq
invemarinstitutions 730445 2023-03-23 17:18:28+00:00 summonedinvemarinstitutions_v1_release.nq
invemartraining 1435575 2023-03-23 17:18:28+00:00 summonedinvemartraining_v1_release.nq
invemarvessels 272856 2023-03-23 17:18:29+00:00 summonedinvemarvessels_v1_release.nq
marinetraining 2618862 2023-03-23 17:18:29+00:00 summonedmarinetraining_v1_release.nq
obis 43477772 2023-03-23 17:18:29+00:00 summonedobis_v1_release.nq
obps 12110954 2023-03-23 17:18:30+00:00 summonedobps_v1_release.nq
oceanexperts 180515480 2023-03-23 17:18:31+00:00 summonedoceanexperts_v1_release.nq
# we create a simple dataframe for demonstration purposes
urls = publicurls(client, "public", "graph")

Tabset

First Tab

Content of this first section will be generated into the first tab content.

Second Tab

Same goes for the second section.

rq_pcount = """SELECT ?p (COUNT(?p) as ?pCount)
WHERE
{
  ?s ?p ?o .
}
GROUP BY ?p
"""
dfc = get_sparql_dataframe(sparqlep, rq_pcount)
dfc['pCount'] = dfc["pCount"].astype(int) # convert count to int
# dfc.set_index('p', inplace=True)
dfc_sorted = dfc.sort_values('pCount', ascending=False)

countByLicense.rq

p pCount
154 http://www.w3.org/1999/02/22-rdf-syntax-ns#type 7914266
75 http://www.w3.org/ns/prov#value 2554814
74 http://www.w3.org/ns/prov#used 1277407
73 http://www.w3.org/ns/prov#hadMember 1277407
72 http://www.w3.org/ns/prov#generated 1277407